R code for lecture 20

# read in galapagos flora data
gala<-read.table('ecol 563/galapagos.txt', header=T)
names(gala)
 
# fit normal model in area
out.norm1 <- lm(Species~Area, data=gala)
plot(Species~Area, data=gala)
abline(out.norm1, lty=2)
 
# fit normal model in log area
out.norm2 <- lm(Species~log(Area), data=gala)
plot(Species~log(Area), data=gala)
abline(out.norm2, lty=2)
 
# plot residuals to assess structural form of the predictor
plot(residuals(out.norm2)~log(Area), data=gala)
abline(h=0, lty=2)
# add a nonparametric smoother
lines(lowess(residuals(out.norm2)~log(gala$Area)), col=2)
 
# fit nonlinear normal model
out.norm3 <- nls(Species~a*Area^b, data=gala, start=list(a=1,b=.5))
summary(out.norm3)
plot(Species~Area, data=gala)
coef(out.norm3)
curve(coef(out.norm3)[1]*x^coef(out.norm3)[2], add=T, col=2)
 
# compare models fit so far
AIC(out.norm1, out.norm2, out.norm3)
 
# fit Poisson model
out.pois<-glm(Species~log(Area), data=gala, family=poisson)
 
# fit NB-2 model
library(MASS)
out.NB2<-glm.nb(Species~log(Area), data=gala)
 
library(gamlss)
# fit NB-1 model
out.NB1<-gamlss(Species~log(Area), data=gala, family=NBII)
# NB-2 model from gamlss for comparison
out.NB1a<-gamlss(Species~log(Area), data=gala, family=NBI)
 
# compare count models
AIC(out.pois, out.NB2, out.NB1, out.NB1a)
 
# plot the count models
plot(Species~log(Area), data=gala)
curve(exp(coef(out.NB1)[1]+coef(out.NB1)[2]*x), add=T, lty=2)
curve(exp(coef(out.pois)[1]+coef(out.pois)[2]*x), add=T, col=4)
curve(exp(coef(out.NB2)[1]+coef(out.NB2)[2]*x), add=T, col=2)
 
# fit lognormal model
out.lognorm <- lm(log(Species)~log(Area), data=gala)
plot(log(Species)~log(Area), data=gala)
abline(out.lognorm, lty=2)
 
# the AIC of this model is not comparable to the rest
AIC(out.lognorm)
 
# function to calculate correct AIC for comparisons with count models
lognorm.LL <- function(y,model,c=0) {
sigma2 <- sum(residuals(model)^2)/length(residuals(model))
prob <- ifelse(y==0, pnorm(log(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)),
pnorm(log(y+c+0.5), mean=predict(model), sd=sqrt(sigma2))-pnorm(log(y+c-0.5), mean=predict(model), sd=sqrt(sigma2)))
LL <- sum(log(prob))
AIC <- -2*LL + 2*(length(coef(model))+1)
out <- data.frame(LL=LL, AIC=AIC)
print(out, row.names=F) }
 
# calculate AIC and LL
lognorm.LL(gala$Species,out.lognorm)
# compare with previous models
AIC(out.norm1, out.norm2, out.norm3, out.pois, out.NB2, out.NB1)
 
# fit square root normal model
out.sqrtnorm <- lm(sqrt(Species)~log(Area), data=gala)
 
# function to calculate correct AIC and LL for comparison with count models
sqrtnorm.LL <- function(y,model,c=0) {
 sigma2 <- sum(residuals(model)^2)/length(residuals(model))
 prob <- ifelse(y==0, pnorm(sqrt(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)),
 pnorm(sqrt(y+c+0.5), mean=predict(model), sd=sqrt(sigma2)) -pnorm(sqrt(y+c-0.5), mean=predict(model), sd=sqrt(sigma2)))
 LL <- sum(log(prob))
 AIC <- -2*LL + 2*(length(coef(model))+1)
 out <- data.frame(LL=LL, AIC=AIC)
 print(out, row.names=F) }
 
# obtain AIC and compare with remaining models
sqrtnorm.LL(gala$Species,out.sqrtnorm)
AIC(out.norm1,out.norm2,out.norm3,out.pois,out.NB2,out.NB1)
lognorm.LL(gala$Species,out.lognorm)
 
# demonstrating that mean(log(x)) < log(mean(x))
curve(log(x),from=.8,to=3.5)
abline(h=0,lty=2)
points(c(1,3), c(log(1), log(3)))
segments(1,log(1),3, log(3), lty=2)
# plot log of mean of 1 and 3
points(2,log(2), pch=16, col=2, cex=.8)
text(2, log(2), 'log of mean', pos=2, col=2, cex=.8)
# plot mean of log 1 and log 3.
points(2,(log(1)+log(3))/2, col=4, cex=.8, pch=16)
text(2,(log(1)+log(3))/2, 'mean of log', col=4, cex=.8, pos=4)
R Code for Lecture 20 (Wednesday, October 31, 2012)